說明:本文為教學示例,不代表法遵建議;實務需遵從你的公司/地區之資安與隱私規範(如個資法/GDPR 等)。
import csv, re, os, hashlib
SALT = "demo_salt_2025" # 真實專案請放到環境變數,不要硬編碼
# --- Luhn 驗證(信用卡檢核) ---
def luhn_check(number: str) -> bool:
s = ''.join(ch for ch in number if ch.isdigit())
if not s:
return False
total = 0
rev = s[::-1]
for i, ch in enumerate(rev):
d = int(ch)
if i % 2 == 1: # 偶數位數字加倍(從右數起)
d *= 2
if d > 9:
d -= 9
total += d
return total % 10 == 0
# --- 基礎格式檢查 ---
EMAIL_RE = re.compile(r'^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$')
# --- 遮罩函式(只保留必要可讀資訊) ---
def mask_email(e: str) -> str:
if "@" not in e:
return "***"
name, domain = e.split("@", 1)
if len(name) <= 2:
name_mask = name[0] + "*"
else:
name_mask = name[0] + "*" * (len(name) - 2) + name[-1]
return name_mask + "@" + domain
def mask_phone(p: str) -> str:
digits = ''.join(ch for ch in p if ch.isdigit())
if len(digits) < 4:
return "***"
return "*" * (len(digits) - 4) + digits[-4:]
def mask_card(c: str) -> str:
digits = ''.join(ch for ch in c if ch.isdigit())
if len(digits) < 10: # 太短就不顯示
return "***"
return digits[:6] + "*" * (len(digits) - 10) + digits[-4:]
def pseudo_id(email_or_key: str) -> str:
"""用鹽化 SHA-256 產生穩定的假名化 ID(不可反推)"""
h = hashlib.sha256((SALT + (email_or_key or "")).lower().encode()).hexdigest()
return "cust_" + h[:12]
# --- 準備輸入示例檔(若不存在就建立) ---
IN_CSV = "customers.csv"
if not os.path.exists(IN_CSV):
with open(IN_CSV, "w", newline="", encoding="utf-8") as f:
w = csv.writer(f)
w.writerow(["name","email","phone","card_number"])
w.writerow(["王小明","ming.wang@example.com","0912-345-678","4111 1111 1111 1111"])
w.writerow(["陳小姐","chenny@example","02-8765-4321","3566-0020-0000-0410"])
w.writerow(["李O爸","li_dad@mail.com","+886-987-000-111","1234-5678-0000-0000"])
print("已建立示範 customers.csv")
OUT_CSV = "customers_sanitized.csv"
issues_counter = {"email_invalid": 0, "card_invalid": 0, "phone_short": 0}
with open(IN_CSV, encoding="utf-8") as f_in, open(OUT_CSV, "w", newline="", encoding="utf-8") as f_out:
r = csv.DictReader(f_in)
fields = ["cust_id","name_masked","email_masked","phone_masked","card_masked",
"is_email_valid","is_card_valid","issues"]
w = csv.DictWriter(f_out, fieldnames=fields)
w.writeheader()
for row in r:
name = row.get("name","").strip()
email = row.get("email","").strip()
phone = row.get("phone","").strip()
card = row.get("card_number","").strip()
# 驗證
valid_email = bool(EMAIL_RE.match(email))
valid_card = luhn_check(card)
phone_digits = ''.join(ch for ch in phone if ch.isdigit())
issue_list = []
if not valid_email:
issues_counter["email_invalid"] += 1
issue_list.append("email")
if not valid_card:
issues_counter["card_invalid"] += 1
issue_list.append("card")
if len(phone_digits) < 9:
issues_counter["phone_short"] += 1
issue_list.append("phone")
# 匿名與遮罩
cust_id = pseudo_id(email or name)
name_masked = name[:1] + "*" * max(len(name) - 1, 1)
w.writerow({
"cust_id": cust_id,
"name_masked": name_masked,
"email_masked": mask_email(email),
"phone_masked": mask_phone(phone),
"card_masked": mask_card(card),
"is_email_valid": int(valid_email),
"is_card_valid": int(valid_card),
"issues": ",".join(issue_list) if issue_list else ""
})
print("✅ 已輸出遮罩報表:", OUT_CSV)
print("問題統計:", issues_counter)